Uploading Files into R and Adding appropriate libraries

library(ggplot2)
setwd("/Users/chanceyan/Documents/R/ThesisCrab")
crabdata <- read.csv("Feeding.csv", h=T)
master <- read.csv("Master.csv", h=T)

We need to clean up the data first to use it.

crabdata$FoodIn <- as.numeric(as.character(crabdata$FoodIn))
## Warning: NAs introduced by coercion
crabdata$FoodOut <- as.numeric(as.character(crabdata$FoodOut))

for(i in 1:nrow(crabdata)){
  if(!is.numeric(crabdata[i,4])){
    crabdata <- crabdata[-i,]
  }
  if(!is.numeric(crabdata[i,6])){
    crabdata <- crabdata[-i,]
  }
}

#Creating number IDs for each crab based of their IDs so it's easier to loop through with future code. Also I'm adding a total eaten by proportion column, so we have a fair metric to compare by.

for(i in 1:36){
  master$num[[i]] <- substr(master$ID[[i]], 3,5)
}

crabdata$amount.eaten <- crabdata$FoodIn-crabdata$FoodOut
all <- merge(crabdata, master, by="ID")
all$proportion.eaten <- (all$amount.eaten / all$WW)
#Changing date column into a date data type, so that R can read it as a Date.
all$Date.x = as.Date(all$Date.x, format = "%m/%d/%Y")

#This code will graph each individual
#for(i in 1:36){
#  craby <- subset(all, all$num == i)
#  graph <- ggplot(aes(x = as.factor(Date.x), y = proportion.eaten), data = craby) + geom_point() + ggtitle(craby$ID)
#  print(graph)
#}

Graping each individual clearly.

table(all$Species.x, all$Trial.x)
##     
##       16  18  20  22  24  26
##   CI  61  54  90 101  88  91
##   CM  60  54  90 108  78 105
##   HS  59  54  90 119  90 102
craby <- subset(all, all$Species.x == "CI")
ggplot(craby, aes(x = Date.x, y = proportion.eaten, color = ID, group = ID)) +
  geom_point() +
  geom_line() + 
  facet_wrap(~Trial.x)
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 15 rows containing missing values or values outside the scale range
## (`geom_line()`).

craby <- subset(all, all$Species.x == "CM")
ggplot(craby, aes(x = Date.x, y = proportion.eaten, color = ID, group = ID)) +
  geom_point() +
  geom_line() + 
  facet_wrap(~Trial.x)
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

craby <- subset(all, all$Species.x == "HS")
ggplot(craby, aes(x = Date.x, y = proportion.eaten, color = ID, group = ID)) +
  geom_point() +
  geom_line() + 
  facet_wrap(~Trial.x)
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

#Graping everyone
holddf <- subset(all, Trial.x == "16")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("16")

holddf <- subset(all, Trial.x == "18")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("18")

holddf <- subset(all, Trial.x == "20")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("20")
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

holddf <- subset(all, Trial.x == "22")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("22")

holddf <- subset(all, Trial.x == "24")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("24")
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

holddf <- subset(all, Trial.x == "26")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x) + ggtitle("26")
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ggplot(all, aes(x = as.factor(Date.x), y = proportion.eaten, fill=Food)) + geom_boxplot() + facet_grid(Period~Species.x)
## Warning: Removed 41 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

#Graping by species so it's easier to see
holddf <- subset(all, Species.y == "Irroratus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Cancer")
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

holddf <- subset(all, Species.y == "Carcinus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Green Crab")
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

holddf <- subset(all, Species.y == "Hemigrapsus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Hemi")
## Warning: Removed 14 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

The data does not look clean. So I will clean it first by removing ones that ate everything. I was also push those who did not eat to 0

for(i in 1:nrow(all)){
  if(grepl("DE", all$Notes.x[i], fixed = TRUE)){
    all$proportion.eaten[i] = 0
  }
  if(grepl("AE", all$Notes.x[i], fixed = TRUE)){
    all <- all[-i, ] 
  }
  if(grepl("DI", all$Notes.x[i], fixed = TRUE)){
    all <- all[-i, ] 
  }
  if(is.na(all$amount.eaten[i])){
    all <- all[-i,]
  }
  else{
    if(all$amount.eaten[i] < 0){
      all$proportion.eaten[i] = 0
    }
  }
}

#Re running graphs
holddf <- subset(all, Species.y == "Irroratus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Cancer")

holddf <- subset(all, Species.y == "Carcinus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Green Crab")

holddf <- subset(all, Species.y == "Hemigrapsus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Hemi")

Looking better! But maybe we can still adjust outliers. Let’s try removing those that molted.

array <- c()

for(i in 1:nrow(all)){
  if(grepl("ME", all$Notes.x[i], fixed = TRUE)){
    ID_value <- all$ID[i]
    for(j in 1:nrow(all)){
      if(all$ID[j] == ID_value){
        array <- c(array, j)
      }
    }
  }
}

all <- all[-array,]
array <- c()

#Re running graphs
holddf <- subset(all, Species.y == "Irroratus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Cancer")

holddf <- subset(all, Species.y == "Carcinus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Green Crab")

holddf <- subset(all, Species.y == "Hemigrapsus")
ggplot(holddf, aes(x = as.factor(Date.x), y = proportion.eaten, fill=as.factor(Period))) + geom_boxplot() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Hemi")

Additional things to look at - size to porportion eaten

ggplot(all, aes(x = WW, y = CL, color = Species.x)) + geom_point()

ggplot(all, aes(x = CL, y = proportion.eaten, color = Species.x)) + geom_point() + facet_wrap(~Species.x,scales = "free_x") + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

holddf <- subset(all, Species.y == "Irroratus")
ggplot(holddf, aes(x = WW, y = proportion.eaten, color=as.factor(Period))) + geom_point() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Cancer") + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

holddf <- subset(all, Species.y == "Carcinus")
ggplot(holddf, aes(x = WW, y = proportion.eaten, color=as.factor(Period))) + geom_point() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Carcinus") + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

holddf <- subset(all, Species.y == "Hemigrapsus")
ggplot(holddf, aes(x = WW, y = proportion.eaten, color=as.factor(Period))) + geom_point() + facet_wrap(~Trial.y,scales = "free_x") + ggtitle("Hemigrapsus") + geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(all, aes(x = WW, y = proportion.eaten, color = Species.x)) + geom_point() + geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(all, aes(x = WW, y = proportion.eaten, color = Species.x)) + geom_point() + geom_smooth(method = "lm", color ="black") + facet_wrap(~Species.x ,scales = "free_x")
## `geom_smooth()` using formula = 'y ~ x'

Get last 3 values to average out and compare among species and trial.

avg_list <- NA
species_list <- NA
trial_list <- NA
period_list <- NA
sample_list <- NA

preload_spp <- c("CI", "CM", "HS")
preload_tr <- c("26", "24", "22", "20", "18", "16")

for(a in 1:length(preload_spp)){
  for(b in 1:length(preload_tr) ){
    for(i in 1:2){
      holddf <- subset(all, all$Species.x == preload_spp[a])
      holddf <- subset(holddf, holddf$Trial.x == preload_tr[b])
      holddf <- subset(holddf, holddf$Period == i)
      holddf$Date.x <- as.Date(holddf$Date.x)
      last.dates <- sort(unique(holddf$Date.x))
      last.dates <- tail(last.dates, 3)
      holddf <- subset(holddf, holddf$Date.x == last.dates[1] | holddf$Date.x == last.dates[2] | holddf$Date.x == last.dates[3])
      avg_list <- append(avg_list, mean(holddf$proportion.eaten))
      species_list <- append(species_list, holddf$Species.x[1])
      trial_list <- append(trial_list, holddf$Trial.x[1])
      period_list <- append(period_list, holddf$Period[1])
      sample_list <- append(sample_list, nrow(na.omit(holddf)))
    }
  }
}

average_df <- data.frame(avg_list, species_list, trial_list, period_list, sample_list)
average_df <- na.omit(average_df)

ggplot(average_df, aes(x = species_list, y = avg_list, color = as.factor(trial_list))) + geom_boxplot()

ggplot(average_df, aes(x = species_list, y = avg_list, color = as.factor(trial_list))) + geom_boxplot() + facet_wrap(~period_list)

Checking Sample size

table(all$Species.x, all$Trial.x)
##     
##       16  18  20  22  24  26
##   CI  60  54  82 100  80  91
##   CM  54  52  82  94  74  95
##   HS  52  54  74 101  75  65

Let’s look into size affects on proportional eaten.

mod <- lm(all$proportion.eaten~all$WW)
coef(mod)
##   (Intercept)        all$WW 
##  0.0918113225 -0.0003709927
mod1 <- glm(all$proportion.eaten ~ all$WW + all$Species.x + all$Trial.x)
summary(mod1)
## 
## Call:
## glm(formula = all$proportion.eaten ~ all$WW + all$Species.x + 
##     all$Trial.x)
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -3.230e-03  9.537e-03  -0.339  0.73489    
## all$WW          -8.067e-05  2.658e-05  -3.035  0.00245 ** 
## all$Species.xCM  1.735e-02  4.314e-03   4.022 6.09e-05 ***
## all$Species.xHS  8.057e-02  5.932e-03  13.584  < 2e-16 ***
## all$Trial.x      1.656e-03  3.600e-04   4.598 4.66e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.001826013)
## 
##     Null deviance: 4.6314  on 1338  degrees of freedom
## Residual deviance: 2.4359  on 1334  degrees of freedom
## AIC: -4636.3
## 
## Number of Fisher Scoring iterations: 2

Seems like weight has an affect on proportional amount eaten. This slope between weight and proportional amount eaten also changes between species. Furthermore, the trial that each individual is in affects the proportional amount eaten.